/* Construct real values for CART-predicted datasets for the FHS industries. 

   Merge the NBER-CES deflators, the capital stock 
   variables constructed from BEA and BLS capital data, the 
   entry/exit flags created in Create_entry_exit_flags.sas,
   and the industry cost shares.

	input files: 
		fhs.sugar_predicted
		fhs.ice_predicted
		fhs.gas_predicted
		fhs.floor_predicted
		fhs.carbon_predicted
		fhs.plywood_predicted
		fhs.coffee_predicted
		fhs.bread_predicted
		fhs.boxes_predicted

		nberces.naics5809
		bea.bea_naics
		bls.BLS_capital_FHS
		fhs.bcd0207
		indcs.cost_shares_FHS

	output files:
		fhs.physugarf_predicted
		fhs.phyicef_predicted
		fhs.phygasf_predicted
		fhs.phyfloorf_predicted
		fhs.phycarbonf_predicted
		fhs.phyplywoodf_predicted
		fhs.phycoffeef_predicted
		fhs.phybreadf_predicted
		fhs.phyboxesf_predicted

*/

/* OPTIONS OBS=500  NOREPLACE  ; */ 

%include "ASMimplibs.sas";


/***
*** PART 1: GRAB VARIABLES WE'LL NEED LATER FROM THE PRODUCT TRAILER DATASETS.
***/

%macro getind(i);

   data &i.2002 (keep=survu_id year ppsr1 phyflag); set fhs.&i.f2002; year=2002; run;
   data &i.2007 (keep=survu_id year ppsr1 phyflag); set fhs.&i.f2007; year=2007; run;
   data &i; set &i.2002 &i.2007; run;
  
   proc sort data=&i; by survu_id year;run;
   proc sort data=fhs.&i._predicted out=&i._predicted; by survu_id year;run;

   data &i._predicted;
   merge &i (in=infinal) &i._predicted (in=inimp);
     by survu_id year;
     if inimp;
   run;

%mend;

*%getind(sugar);
%getind(ice);
*%getind(plywood);
*%getind(gas);
*%getind(floor);
*%getind(coffee);
*%getind(carbon);
*%getind(bread);
*%getind(boxes);
**%getind(conc);



/***
*** PART 2: Construct the real "flow" variables needed for TFP calculations
***/

/* Read in NBER-CES deflators for FHS industries */

data deflators (keep = naics year piship pimat pien piinv);
 set nberces.naics5809_new;
 if year in (2002, 2007); 
run;

/* Convert NBER-CES price indexes to a base year of 2002. */

data deflators02 (keep = naics piship02_base97 pimat02_base97 pien02_base97 piinv02_base97);
 set deflators;
 if year=2002;
 piship02_base97 = piship;
 pimat02_base97 = pimat;
 pien02_base97 = pien;
 piinv02_base97 = piinv;
run;


proc datasets library=work nolist;
 modify deflators;
  rename piship = piship_base97;
  rename pimat = pimat_base97;
  rename pien = pien_base97;
  rename piinv = piinv_base97;
run;

proc sort data=deflators; by naics; run;
proc sort data=deflators02; by naics; run;

/* proc contents data=deflators;
run;
*/

data deflators (keep = naics_char year piship pimat pien piinv);
 merge deflators02 deflators;
 by naics;
 piship = piship_base97/piship02_base97;
 pimat = pimat_base97/pimat02_base97;
 pien = pien_base97/pien02_base97;
 piinv = piinv_base97/piinv02_base97;
 naics_char = put(naics,z6.);
run;

proc datasets library=work nolist;
 modify deflators;
  rename naics_char=naics;
run;

proc sort data=deflators;
 by naics year;
run;

%macro create_real_values(ind);

 proc sort data=&ind; by naics year; run;
 
 /* Merge the (base 2002) deflators with the stacked plant-level industry data
    and create the real flow variables. */

 data &ind._real ;
   merge &ind (in=infhs) deflators (in=incs) ;
   by naics year;
   if infhs;
 run;

 data &ind._real ;
  set &ind._real ;
  naics3=substr(naics,1,3);
  if naics3="312" then naics3="311";  /* Need to do this because BEA and BLS capital data aggregate 311 and 312. */

  * Need to reconstruct price for CART-completed data;
  if pqs>0 then price=pv/pqs;
  label price="Constructed Price=PV/PQS";

  * "output" ;
  * We could also try including changes in inventories in our measure of output;
  * But for the first pass, just use shipments;
  q = tvs/piship;
  if q>0 then lnq = log(q);
  else lnq = .;

  * energy ;
  e = sum(cf,ee)/pien; 
  if e > 0 then le = log(e);
  else le = .;

  * labor input;
  if ph > 0 then pw_wage = ww/ph;
    else pw_wage=.;
    if pw_wage>0 then pwhours = sw/pw_wage;  
    else pwhours = .;
    if pwhours>0 then lth = log(pwhours);
  else lth = .;

  * materials;
  matq = sum(cm,-1*cf,-1*ee)/pimat;
  if cm > sum(cf,ee)
  then matq = sum(cm,-1*cf,-1*ee)/pimat;
  else matq = cm/pimat;
  if matq > 0 then lm = log(matq);
  else lm = .;

 run;

%mend create_real_values;


* Create the real flow variables; 

data icef; set ice_predicted; naics="312113"; run;
%create_real_values(icef);

/************
data sugarf; set sugar_predicted; naics="311312"; run;
%create_real_values(sugarf);

data plywoodf; set plywood_predicted; naics="321211"; run;
%create_real_values(plywoodf);

data gasf; set gas_predicted; naics="324110"; run;
%create_real_values(gasf);

data floorf; set floor_predicted; naics="321918"; run;
%create_real_values(floorf);

data coffeef; set coffee_predicted; naics="311920"; run;
%create_real_values(coffeef);

data carbonf; set carbon_predicted; naics="325182"; run;
%create_real_values(carbonf);

data breadf; set bread_predicted; naics="311812"; run;
%create_real_values(breadf);

data boxesf; set boxes_predicted; naics="322211"; run;
%create_real_values(boxesf);
*************/

/***data concf; set conc_predicted;run;
**%create_real_values(concf);
***/

/***
**** PART 3: Merge in the BEA and BLS capital data and construct the capital stock variables ;
***/


 data beacapital (keep = naics_bea naics_bea7 year nkcst gkhst);
  set bea.bea_naics;
   naics_bea7 = substr(naics_bea,1,7);
   if naics_bea7 in ('311-312','321    ','322    ','324    ','325    ','327    ')
      and year in (2002,2007);
 run;

 /* The BEA capital data aggregates some of the NAICS3 categories,
    so we need to create a new variables for NAICS3. */

 data beacapital (keep = naics3 year nkcst gkhst); 
  set beacapital;
   if naics_bea7 = "311-312" then naics3="311"; /* Food and tobacco products */
   else naics3=substr(naics_bea7,1,3);
 run;


 proc sort data=beacapital; by naics3 year; run;

 /* Merge in the BLS investment price deflator for all assets. */

 data bls_capital_FHS;
  set bls.BLS_capital_FHS;
  if year in (2002,2007);
 run;

 proc sort data = BLS_capital_FHS out=bls_capital_FHS (keep = naics3 year piinv_base02 ); 
  by naics3 year; 
 run;

%macro create_real_capital(ind);

 proc sort data=&ind._real; by naics3 year; run;

 data &ind._real;
  merge &ind._real (in=infhs) beacapital;
  by naics3 year;
  if infhs;
 run;

 data &ind._real;
  merge &ind._real (in=infhs) bls_capital_FHS (in=inbls);
  by naics3 year;
  if infhs;
 run;

  /* Here I'm using the historical vs. current BEA data for structures
    for total capital assets.  The CMF no longer breaks out book values of 
    assets by equipment and structures. 
    To convert this to 2002 dollars, I am using the BLS investment price
    deflator for "all assets". */
 
 data &ind._real;
  set &ind._real;
      /** k=tae*(nkcst/gkhst)/piinv_base02; */
      k=tab*(nkcst/gkhst)/piinv_base02;
      if k>0 then lk = log(k);
 run;

%mend create_real_capital;

%create_real_capital(icef);

*%create_real_capital(sugarf);
*%create_real_capital(plywoodf);
*%create_real_capital(gasf);
*%create_real_capital(floorf);
*%create_real_capital(coffeef);
*%create_real_capital(carbonf);
*%create_real_capital(breadf);
*%create_real_capital(boxesf);
/**%create_real_capital(concf); **/

  
/****
***** PART 4: Merge the entry/continuer dummies with the FHS dataset.
*****  (We already have the exit dummy on the imputed data.) 
*****/

data bcd0207 (drop = te02 te07 exit02);
 set fhs.bcd0207;
run;

%macro merge_entryexit(ind);

  proc sort data=&ind._real; by survu_id; run;

  data &ind._real_w_bcd ;
   merge  &ind._real (in=infhs) bcd0207 (in=inbcd) ;
  by survu_id;
   if infhs;
  run;

%mend merge_entryexit;

%merge_entryexit(icef);

*%merge_entryexit(sugarf);
*%merge_entryexit(plywoodf);
*%merge_entryexit(gasf);
*%merge_entryexit(floorf);
*%merge_entryexit(coffeef);
*%merge_entryexit(carbonf);
*%merge_entryexit(breadf);
*%merge_entryexit(boxesf);
/***%merge_entryexit(concf); ***/


/***
**** PART 5: Merge in the industry cost shares ;
***/

data cost_shares_FHS (drop=naics);
 set indcs.cost_shares_FHS;
  naics_char = put(naics,z6.);
run;
proc datasets library=work nolist;
 modify cost_shares_FHS; rename naics_char=naics;
run;

proc sort data=cost_shares_FHS out=cost_shares_FHS; by naics year; run;

%macro merge_cs(ind);

  proc sort data=&ind._real_w_bcd; by naics year; run;

  data fhs.phy&ind._predicted;
   merge cost_shares_FHS (in=incs) &ind._real_w_bcd (in=infhs);
   by naics year;
   if infhs;
  run;

  proc datasets library=fhs nolist;
   modify phy&ind._predicted;
    label k = 'Real capital stock, 2002 $K, adjusted from book values';
    label lk = 'Log of real capital stock, 2002 $K, adjusted from book values';
    label e = 'Real energy costs, 2002 $K, deflated sum of cost of fuels and cost of purchased electricity';
    label le = 'Log of real energy costs, 2002 $K';
    label matq = 'Real cost of materials, 2002 $K, (without energy)';
    label lm = 'Log of real cost of materials (without energy), 2002 $K';
    label q = 'Real total value of shipments, 2002 $K';
    label lnq = 'Log of real total value of shipments, 2002 $K';
    label lth = 'Log of total (production-worker-equivalent) hours';
    label pwhours = 'Total production-worker-equivalent hours'; 
    label year = 'year';
  run;


%mend merge_cs;

%merge_cs(icef);

*%merge_cs(sugarf);
*%merge_cs(plywoodf);
*%merge_cs(gasf);
*%merge_cs(floorf);
*%merge_cs(coffeef);
*%merge_cs(carbonf);
*%merge_cs(breadf);
*%merge_cs(boxesf);
/***%merge_cs(concf);***/



